---
title: "Cerebral oxygenation manuscript figures"
date: "`r Sys.Date()`"
output: pdf_document
---

<style type="text/css">
  body{
  font-family: Arial;
  font-size: 12pt;
}
</style>

```{r setup, include=FALSE}

knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE, comment = "", fig.height = 6, fig.width = 10, 
                      fig.align = 'center')

# Load libraries
library(tidyverse)
library(readxl)
library(DescTools)
library(janitor)
library(FSA)
library(factoextra)
library(ggrepel)
library(ggpubr)
library(tidymodels)
library(vip)
library(scales)
library(reshape)
library(extrafont)
library(grDevices)
library(psych)

# Load data
base_dir <- "/Users/ray/Library/CloudStorage/OneDrive-NationalInstitutesofHealth/Ackerman Lab 2019-2021/cerebral_oxygenation/"

load(paste0(base_dir, "objects/20221125_final_glm_models.Rdata"))
load(paste0(base_dir, "objects/glm_model.Rdata"))
load(paste0(base_dir, "objects/brain_bandpass_filtered_dfa_results.Rdata"))
load(paste0(base_dir, "objects/sims.Rdata"))

# ALT LOADING: these objects are located in a directory 3 levels above, i.e.:
load("../../../objects/20221125_final_glm_models.Rdata")

font_import()

```

```{r data}

# read in data

df_hbox <- read_csv(paste0(base_dir, "Data/Hans_datatable_exports/malawi key data v04Jan2022.csv")) %>% 
  as_tibble() %>% 
  janitor::clean_names() %>% 
  mutate(status = replace(status, status == "HV", "HC")) %>% 
  dplyr::rename("subject_id" = "subject_id_session") %>% 
  
  dplyr::filter(status %in% c("CM", "HC", "UM") & session_no == 1 | subject_id == "TM0003CM01") %>% 
  dplyr::filter(!grepl("blood", subject_id)) %>% 
  mutate(status = factor(status, levels = c("HC", "UM", "CM"))) %>% 
  mutate_at(c("bp_dias", "bp_sys", "glucose", "lactate"), as.numeric) %>% # converts remaining character variables to numerics
  select(subject_id, status, everything()) %>% 
  group_by(status)

```

### Table 1. Baseline clinical characteristics

#### Table 1a. All clinical data
```{r table1a, eval = FALSE}

# number of subjects 
df_status <- df_hbox %>% 
  count() %>% 
  pivot_wider(names_from = "status", values_from = "n") %>% 
  mutate(variable = "Number of subjects") %>% 
  dplyr::select(variable, everything()) %>% 
  mutate_if(is.numeric, as.character)

# Sex
df_sex <- df_hbox %>% 
  count(sex) %>% 
  pivot_wider(id_cols = "status", names_from = "sex", values_from = "n") %>% 
  mutate(perc_fem = round(Female/(Female + Male)*100, 2)) %>% 
  dplyr::select(-c(Female, Male)) %>% 
  
  pivot_wider(names_from = "status", values_from = "perc_fem") %>% 
  mutate(variable = "Sex (% female)") %>% 
  dplyr::select(variable, everything()) %>% 
  mutate_if(is.numeric, as.character)

# other numeric variables

  # select variables for table
  clinical_voi <- c("age_calc", "temperature", "hr", "bp_sys", "bp_dias", "resp_rate", "pulse_ox", "hct", "lactate", "glucose", "aa_arg")
  
  # calculate median, first and third quartiles
  df_clinical_res <- df_hbox %>% 
    select_at(clinical_voi) %>% 
    summarise_all(funs(median = median, q1 = quantile(., probs = 0.25), q3 = quantile(., probs = 0.75)), na.rm = TRUE) %>% 
    mutate_if(is.numeric, round, 2)
  
  # write function to paste together values
  f_unite_cols <- function(col) {
    df_clinical_res %>% 
      unite(!! col, starts_with(col), sep = "_") %>% 
      dplyr::select(!! col) 
    }
  
  # paste together and transpose
  df_clinical_res <- 1:length(clinical_voi) %>% 
    purrr::map_dfc(~f_unite_cols(clinical_voi[.x])) %>% 
    mutate_all(funs(str_replace(., "_", " ("))) %>% 
    mutate_all(funs(str_replace(., "_", ", "))) %>% 
    mutate_all(funs(paste0(., ")"))) %>% 
    
    t() %>% 
    as_tibble(rownames = "variable") %>% 
    dplyr::rename("HC" = "V1", "UM" = "V2", "CM" = "V3") %>% 
    add_row(df_status, .before = 1) %>% 
    add_row(df_sex, .before = 2) 

# export to CSV
  write.csv(df_clinical_res, file = "Outputs/table_1.csv")
  
# create kable df
  df_clinical_res %>% knitr::kable(align = rep('c', 4))

```
**Table 1a.** A summary of subject information and hematological data. Group patient data presented as median (quartile 1, quartile 3).

#### Table 1b. CM-specific data
```{r table1b, eval = FALSE}

cm_voi <- c("bcs_session", "brain_swell", "hrp2")

df_cm_voi <- df_hbox %>% 
  ungroup %>% 
  filter(status == "CM") %>% 
  select_at(cm_voi) %>% 
  mutate_at(c("bcs_session", "brain_swell"), as.factor) 
  

# BCS counts
df_cm_voi %>% count(bcs_session) %>% knitr::kable()
  
# Brain swell score counts
df_cm_voi %>% count(brain_swell) %>% knitr::kable()

# HRP
df_cm_hrp2 <- df_cm_voi %>% 
  select(cm_voi[3]) %>% 
  summarise_all(funs(median = median, 
                     q1 = quantile(., probs = 0.25), 
                     q3 = quantile(., probs = 0.75)), 
                na.rm = TRUE) %>% 
  unite(hrp2, 1:2, sep = " (") %>% 
  unite(hrp2, 1:2, sep = ", ") %>% 
  mutate(hrp2 = paste0(hrp2, ")")) 

df_cm_hrp2 %>% knitr::kable()
  
```
**Table 1b.** CM-specific clinical characteristics

### Fig 1. Hb_oxy and Hb_tot in the brain and muscle
```{r fig1}

# extract hb_tot and hb_oxy for the brain and muscle
df_fig1 <- df_hbox %>% 
  select(contains(c("hb_tot","hb_oxy")), -contains(c("alpha", "bsl"))) %>%
  ungroup() %>% 
  pivot_longer(2:ncol(.), names_to = "variable", values_to = "value") %>% 
  separate(variable, into = c("tissue", "variable"), sep = "_", extra = "merge")

# create color vector
my_col <- c("CM" = "#00BFC4", "HC" = "#7CAE00", "UM" = "#F8766D")

# plot
df_fig1 %>% 
  
  ggplot(aes(x = status, y = value)) +
  geom_violin(aes(color = status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = status), size = 0.2, width = 0.5) +
  facet_grid(variable ~ tissue, scales = "free_y") +
  
  scale_color_manual(values = my_col) +
  labs(x = "") +
  theme_bw() +
  theme(legend.position = "none")  +
  
  theme(# text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```
 \newpage
 
### Fig 2. Hb_oxy alpha and Hb_tot alpha in the brain and muscle
```{r fig2}

# extract hb_tot and hb_oxy for the brain and muscle
df_fig2 <- df_hbox %>% 
  select(contains("alpha2")) %>%
  ungroup() %>% 
  pivot_longer(2:ncol(.), names_to = "variable", values_to = "value") %>% 
  separate(variable, into = c("tissue", "variable"), sep = "_", extra = "merge") %>% 
  mutate(variable = str_remove(variable, "2"))

# plot
df_fig2 %>% 
  
  ggplot(aes(x = status, y = value)) +
  geom_violin(aes(color = status)) +
  geom_jitter(position = position_jitter(0.2), shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = status), size = 0.2, width = 0.5) +
  facet_grid(variable ~ tissue, scales = "free_y") +
  
  geom_hline(yintercept = 0.4, lty = 2, alpha = 0.7) +
  geom_hline(yintercept = 0.6, lty = 2, alpha = 0.7) +
  geom_hline(yintercept = 1.0, lty = 2, alpha = 0.7) +
  geom_hline(yintercept = 1.5, lty = 2, alpha = 0.7) +
  
  scale_color_manual(values = my_col) +
  ylim(c(0, 1.5)) +
  labs(x = "") +
  theme_bw() +
  theme(legend.position = "none")  +
  
  theme(# text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_blank(),
        axis.text = element_text(size = 15))

```
 \newpage
 
### Fig 3. Increased cerebral_hb_tot and decreased cerebral_hb_tot_alpha are distinct clinical features of cerebral malaria
```{r fig3_data}

# select model variables of interest
model_voi <- c("subject_id", "status", 
               "hct", "lactate",
               "cerebral_hb_tot", "cerebral_hb_oxy",
               "cerebral_hb_tot_alpha2", "cerebral_hb_oxy_alpha2")

df_model <- df_hbox %>% 
  select_at(model_voi) %>% 
  
  # remove HC
  ungroup %>% 
  filter(status != "HC")

# impute missing data with median by group
  f_impute <- function(x, na.rm = TRUE) (replace(x, is.na(x), median(x, na.rm = na.rm)))
  
  df_model_impute <- df_model %>% 
    group_by(status) %>% 
    mutate_at(3:ncol(.), f_impute) %>% 
    ungroup() 

```

#### A. Correlations of variables within subject groups
```{r fig3a_cors}

# find pearson correlation across all variables of interest, including p-values and adjusted p-values

cm_cor <- df_model %>% 
  dplyr::filter(status == "CM") %>% 
  dplyr::select(-c(subject_id, status)) %>% 
  corr.test(method = "pearson", adjust = "none")

um_cor <- df_model %>% 
  dplyr::filter(status == "UM") %>% 
  dplyr::select(-c(subject_id, status)) %>% 
  corr.test(method = "pearson", adjust = "none")

# combine into tibble
df_fig3a <- cm_cor$r %>% 
  as_tibble %>% 
  mutate(var1 = colnames(.), .before = hct) %>% 
  pivot_longer(2:ncol(.), names_to = "var2", values_to = "pearsons_r") %>% 
  mutate(status = "CM", .before = var1) %>% 
  
  left_join(
    
    cm_cor$p %>% 
      as_tibble %>% 
      mutate(var1 = colnames(.), .before = hct) %>% 
      pivot_longer(2:ncol(.), names_to = "var2", values_to = "p_value"),
    
    by = c("var1", "var2")
    
  ) %>% 
  
  # adjust p-values
  mutate(adj_p_val = p.adjust(p_value, method = "BH")) %>% 
  
  bind_rows(
    
    um_cor$r %>% 
      as_tibble %>% 
      mutate(var1 = colnames(.), .before = hct) %>% 
      pivot_longer(2:ncol(.), names_to = "var2", values_to = "pearsons_r") %>% 
      mutate(status = "UM", .before = var1) %>% 
      
      left_join(
        
        um_cor$p %>% 
          as_tibble %>% 
          mutate(var1 = colnames(.), .before = hct) %>% 
          pivot_longer(2:ncol(.), names_to = "var2", values_to = "p_value"),
        
        by = c("var1", "var2")
        
      ) %>% 
      mutate(adj_p_val = p.adjust(p_value, method = "BH"))

    
  ) %>% 
  
  # set factor levels for variables
  mutate_at(2:3, factor, levels = c("hct", "lactate", "cerebral_hb_oxy", "cerebral_hb_tot", "cerebral_hb_oxy_alpha2", "cerebral_hb_tot_alpha2")) %>% 
  
  # set diagonal equal to zero
  mutate(pearsons_r = ifelse(var1 == var2, 0, pearsons_r))

```

```{r fig3a_sig_cors}

### find lowest |r| value that corresponds to a p < 0.05

t_stat_cor <- function(r, n){(r*sqrt(n - 2)) / (sqrt(1 - r^2))} # equation to calculate t-stat given correlation and n

## for CM:

# combine pearson's r and sample size
df_t_cor_cm <- cm_cor$r %>% 
  as_tibble %>% 
  mutate(var1 = colnames(.), .before = hct) %>% 
  pivot_longer(2:ncol(.), names_to = "var2", values_to = "pearsons_r") %>% 
  
  left_join(
    
    cm_cor$n %>% 
      as_tibble %>% 
      mutate(var1 = colnames(.), .before = hct) %>% 
      pivot_longer(2:ncol(.), names_to = "var2", values_to = "n")
    
  ) %>% 
  
  # calculate t-stat of each
  mutate(t = ifelse(pearsons_r != 1, t_stat_cor(pearsons_r, n), NA))

# compare with t-distribution table
lowest_sig_r_cm <- read_excel(paste0(base_dir, "Data/t_distribution_table.xlsx")) %>% 
  dplyr::rename("n" = "One Tail") %>% 
  filter(!(n %in% c("Two Tails", "df"))) %>% 
  dplyr::select(1:2) %>% 
  mutate(n = as.numeric(n)) %>% 
  
  left_join(df_t_cor_cm, by = "n") %>% 
  filter(!is.na(t)) %>% 
  
  # find significance by t-value
  mutate(is_sig = ifelse(abs(t) > `0.05`, "yes", "no")) %>% 
  
  # pull only significant correlations
  filter(is_sig == "yes") %>% 
  
  # find lowest r value associated with significant correlation
  slice(which.min(abs(pearsons_r))) %>% pull(pearsons_r)

## repeat for UM:

# combine pearson's r and sample size
df_t_cor_um <- um_cor$r %>% 
  as_tibble %>% 
  mutate(var1 = colnames(.), .before = hct) %>% 
  pivot_longer(2:ncol(.), names_to = "var2", values_to = "pearsons_r") %>% 
  
  left_join(
    
    um_cor$n %>% 
      as_tibble %>% 
      mutate(var1 = colnames(.), .before = hct) %>% 
      pivot_longer(2:ncol(.), names_to = "var2", values_to = "n")
    
  ) %>% 
  
  # calculate t-stat of each
  mutate(t = ifelse(pearsons_r != 1, t_stat_cor(pearsons_r, n), NA))

# compare with t-distribution table
lowest_sig_r_um <-  read_excel(paste0(base_dir, "Data/t_distribution_table.xlsx")) %>% 
  dplyr::rename("n" = "One Tail") %>% 
  filter(!(n %in% c("Two Tails", "df"))) %>% 
  dplyr::select(1:2) %>% 
  mutate(n = as.numeric(n)) %>% 
  
  left_join(df_t_cor_um, by = "n") %>% 
  filter(!is.na(t)) %>% 
  
  # find significance by t-value
  mutate(is_sig = ifelse(abs(t) > `0.05`, "yes", "no")) %>% 
  
  # pull only significant correlations
  filter(is_sig == "yes") %>% 
  
  # find lowest r value associated with significant correlation
  slice(which.min(abs(pearsons_r))) %>% pull(pearsons_r)

```

```{r fig3a}

df_fig3a %>% 

  ggplot(aes(x = var1, y = var2, fill = pearsons_r, label = ifelse(pearsons_r != 0, round(pearsons_r, 2), NA))) + 
  geom_tile(aes(width = 0.9, height = 0.9)) +

  facet_wrap(~ status) +
    
  scale_fill_gradient2(low = "#4575b4", high = "#d73027") +
  geom_text(size = 4) +
  
  xlab("") +
  ylab("") +
  theme_classic() +
  theme(axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = 45, hjust = 0.95))

```
In the CM group (n = 46), absolute correlation values greater 0.32 are statistically significant at a threshold of p < 0.05. In the UM group (n = 36), absolute correlation values greater than the absolute value of 0.43 are statistically significant.
\newpage
 
#### B. Principal component analysis
```{r fig3b}

# convert tibble to df
df_model_pca <- df_model_impute %>% 
  dplyr::select(-c(subject_id, status)) %>% 
  as.data.frame() 

# add rownames
rownames(df_model_pca) <- df_model_impute$subject_id

# compute PCA
my_pca <- prcomp(df_model_pca, scale = TRUE)

# define groups
groups <- as.factor(df_model_impute$status)

# Eigenvalues
pca_eig_val <- get_eigenvalue(my_pca)

# two data frames for plotting
df_vars <- my_pca$rotation %>% as_tibble() %>% mutate(variable = rownames(my_pca$rotation))
df_pnts <- my_pca$x %>% 
  as_tibble() %>% 
  mutate(status = groups, 
         subject_id = df_model_impute$subject_id) 

# plot

  # plot patients
ggplot() +
  geom_point(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = status), size = 3) +
  # stat_conf_ellipse(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = status, fill = status),
  #                   level = 0.95, npoint = 100, bary = TRUE, alpha = 0.1, geom = "polygon") +
  # stat_mean(data = df_pnts, mapping = aes(x = PC1, y = PC2, color = status, shape = status),
  #           na.rm = FALSE) +
  
  # View patient IDs
  # geom_text_repel(data = df_pnts, aes(x = PC1, y = PC2, label = subject_id, size = 10), show.legend = FALSE) +
  
  # plot variables
  # geom_point(data = df_vars, aes(x = PC1*5, y = PC2*5)) +
  geom_text_repel(data = df_vars, aes(x = PC1*5, y = PC2*5, label = variable, size = 10), show.legend = FALSE) +
  geom_segment(data = df_vars, aes(x = 0, y = 0, xend = PC1*5, yend = PC2*5), arrow = arrow()) +
  
  # draw lines
  geom_vline(xintercept = 0, lty = 2, color = "black") +
  geom_hline(yintercept = 0, lty = 2, color = "black") +
  
  # design
  xlab(paste0("PC1: ", round(pca_eig_val$variance.percent[1], 2), "% of variance")) +
  ylab(paste0("PC2: ", round(pca_eig_val$variance.percent[2], 2), "% of variance")) +

  theme_bw() +
  theme(# text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```
\newpage
 
#### C. Coefficients
```{r model, eval = FALSE}

### shows how logistic regression model was set up and run, results saved in objects/glm_model ###

# resample
set.seed(234)
hbox_boot <- bootstraps(df_model, strata = Status, times = 5000)

# recipe
hbox_rec <- recipe(Status ~ ., data = df_model) %>% 
  step_impute_knn(all_predictors()) %>% 
  step_normalize(all_predictors()) %>%
  step_zv(all_predictors()) %>%
  step_smote(Status) %>% 
  update_role(subject_id, new_role = "id")

# create workflow
hbox_wf <- workflow() %>% add_recipe(hbox_rec)

# logistic regression model specifications
glm_spec <- logistic_reg() %>% set_engine("glm")

# create model on 5000 different resamples
glm_res <- hbox_wf %>% 
  add_model(glm_spec) %>%
  fit_resamples(
    resamples = hbox_boot,
    metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
    control = tune::control_resamples(save_pred = TRUE, verbose = TRUE)
  )

# select best model to be used for evaluation
glm_best <- glm_res %>% select_best("roc_auc")

# finalize model
hbox_final <- hbox_wf %>% 
  add_model(glm_spec) %>% 
  finalize_workflow(glm_best) %>%
  fit(df_model) %>%
  extract_fit_parsnip()

```

```{r table3c}

df_CI <- confint(hbox_final$fit) %>% exp() %>% as_tibble(rownames = "term")

table3c <- hbox_final %>%
  tidy(exponentiate = TRUE) %>%
  filter(p.value < 0.05) %>% 
  left_join(df_CI, by = "term") %>%
  mutate(estimate = ifelse(estimate < 10^-2, scientific(estimate, digits = 3), round(estimate, 2)),
         conf.low = ifelse(`2.5 %` < 10^-2, scientific(`2.5 %`, digits = 3), round(`2.5 %`, 2)),
         conf.high = ifelse(`97.5 %` < 10^-2, scientific(`97.5 %`, digits = 3), round(`97.5 %`, 2))) %>%
  mutate(p.value = scientific(p.value, digits = 3),
         statistic = round(statistic, 2)) %>%
  unite("95% CI", c(conf.low, conf.high), sep = ", ") %>%
  dplyr::select(c(term, estimate, `95% CI`, statistic, p.value)) %>% 
  dplyr::rename("Term" = "term", "Odds ratio" = "estimate",
                "t-statistic" = "statistic", "p-value" = "p.value") %>%
  filter(Term != "(Intercept)") %>% 
  as.data.frame()

table3c %>% knitr::kable(align = rep('c', 5))

```
**note: for the OR, denominator is probability of patient having CM; numerator is p patient having UM

\newpage
 
#### D. ROC curve of final model
```{r fig3d_plot, fig.height = 8}

df_roc <- glm_res %>% 
  collect_predictions() %>% 
  roc_curve(status, .pred_CM)

ggplot() +
  geom_path(data = df_roc, 
            aes(x = 1 - specificity, y = sensitivity), 
            size = 2.5) +
  geom_abline(lty = 3) +
  labs(x = "False positive rate", y = "True positive rate") +
  theme_bw() +
  
  theme(# text = element_text(family = "Arial"),
        axis.title = element_text(size = 25),
        axis.text = element_text(size = 20),
        legend.title = element_text(size = 20),
        legend.text = element_text(size = 15),
        legend.position = "none")

```

```{r fig3d_inset}

fig3d_inset <- glm_res %>% 
  collect_metrics() %>% 
  dplyr::select(.metric, mean, std_err) %>% 
  mutate(mean = mean*100, std_err = std_err*100) %>% 
  mutate(Metric = c("Accuracy", "ROC AUC", "Sensitivity", "Specificity")) %>% 
  dplyr::select(c("Metric", "mean", "std_err")) %>% 
  
  dplyr::rename("Mean" = "mean", "Standard error" = "std_err") %>% 
  mutate_if(is.numeric, round, 2) %>% 
  as.data.frame()
    
fig3d_inset %>% knitr::kable(align = rep('c', 4))

```
\newpage
 
### Fig 4. An increase in cerebral_hb_tot is significantly associated with an increase in brain swelling score
```{r brainswell_model_comparison, eval = FALSE}

# remove cerebral_hb_tot outliers
  df_hbox_hbtot_outliers <- df_hbox %>% 
    mutate(Q1_hbtot = quantile(cerebral_hb_tot, 0.25, na.rm = TRUE),
           Q3_hbtot = quantile(cerebral_hb_tot, 0.75, na.rm = TRUE),
           IQR_hbtot = Q3_hbtot - Q1_hbtot,
           outlier_hbtot = case_when(
             
             cerebral_hb_tot < Q1_hbtot - IQR_hbtot ~ 1,
             cerebral_hb_tot > Q3_hbtot + IQR_hbtot ~ 1,
             TRUE ~ 0
             
           )
           
    ) %>% 
    filter(outlier_hbtot != 1)

lm1 <- lm(brain_swell ~ cerebral_hb_tot, data = df_hbox_hbtot_outliers)
lm2 <- lm(brain_swell ~ cerebral_hb_tot + hct, data = df_hbox_hbtot_outliers)
lm3 <- lm(brain_swell ~ cerebral_hb_tot + cerebral_hb_tot_alpha2, data = df_hbox_hbtot_outliers)
lm4 <- lm(brain_swell ~ cerebral_hb_tot + cerebral_hb_tot_alpha2 + hct, data = df_hbox_hbtot_outliers)
lm5 <- lm(brain_swell ~ cerebral_hb_oxy + cerebral_hb_oxy_alpha2 + cerebral_hb_tot, data = df_hbox_hbtot_outliers)

# evaluate parameters for all models
brain_swell_models <- list(lm1, lm2, lm3, lm4, lm5)

df_brain_swell_models <- tibble(formula = as.character(),
                                term = as.character(),
                                aic = as.numeric(),
                                r2 = as.numeric(),
                                adj_r2 = as.numeric(),
                                model_p_val = as.numeric(),
                                estimate = as.numeric(),
                                std_error = as.numeric(),
                                statistic = as.numeric(),
                                term_p_val = as.numeric())

for (i in 1:length(brain_swell_models)) {
  
  formula <- brain_swell_models[[i]] %>% .$call
  formula <- gsub(".*=", "", formula)[[2]]
  
  aic <- brain_swell_models[[i]] %>% AIC
  r2 <- brain_swell_models[[i]] %>% summary %>% .$r.squared
  adj_r2 <- brain_swell_models[[i]] %>% summary %>% .$adj.r.squared
  f <- brain_swell_models[[i]] %>% summary %>% .$fstatistic
  p <- pf(f[1], f[2], f[3], lower.tail = FALSE)
  coef <- brain_swell_models[[i]] %>% tidy %>% filter(term != "(Intercept)") %>% dplyr::rename("term_p_val" = "p.value")
  
  df_tmp <- tibble(formula = formula,
                   term = coef$term,
                   aic = aic,
                   r2 = r2,
                   adj_r2 = adj_r2,
                   model_p_val = p) %>%
    left_join(coef) %>%
    clean_names

  df_brain_swell_models <- df_brain_swell_models %>% 
    bind_rows(df_tmp) %>% 
    relocate(term, .before = estimate)
  
}

df_brain_swell_models %>% write.csv(file = paste0("outputs/brain_swell_models.csv"))

```

```{r fig4_plot}

# select model voi
brain_swell_voi <- c("brain_swell", "cerebral_hb_tot", "hct")
df_fig4 <- df_hbox %>% select_at(brain_swell_voi)

# remove cerebral_hb_tot outliers
  df_fig4_outliers <- df_fig4 %>% 
    mutate(Q1_hbtot = quantile(cerebral_hb_tot, 0.25, na.rm = TRUE),
           Q3_hbtot = quantile(cerebral_hb_tot, 0.75, na.rm = TRUE),
           IQR_hbtot = Q3_hbtot - Q1_hbtot,
           outlier_hbtot = case_when(
             
             cerebral_hb_tot < Q1_hbtot - IQR_hbtot ~ 1,
             cerebral_hb_tot > Q3_hbtot + IQR_hbtot ~ 1,
             TRUE ~ 0
             
           )
           
    ) %>% 
    filter(outlier_hbtot != 1)

# model
  lm_fig4 <- lm(brain_swell ~ cerebral_hb_tot + hct, data = df_fig4_outliers)
  f <- summary(lm_fig4)$fstatistic
  p <- pf(f[1], f[2], f[3], lower.tail = FALSE)
  
  model <- "model: brain_swell ~ cerebral_hb_tot + hematocrit"
  r2 <- paste0("adjusted R^2: ", lm_fig4 %>% summary() %>% .$adj.r.squared %>% round(2))
  model_p_val <- paste0("model: ", round(p, 3))
  hb_tot_p_val <- paste0("cerebral_hb_tot: ", lm_fig4 %>% tidy() %>% .[2, "p.value"] %>% .$p.value %>% round(3))
  
# plot variables
require(ggpmisc)

df_fig4_outliers %>% 
  ggplot(aes(x = cerebral_hb_tot, y = brain_swell)) +
  geom_point(shape = 1, aes(size = hct, color = as.factor(brain_swell))) +
  #geom_abline(intercept = ) +
  
  geom_smooth(method = "lm", lty = 2, color = "#696969", alpha = 0.2) +
  # annotate("text", label = model, x = 25, y = 8, hjust = 0, vjust = 1, size = 5) +
  # annotate("text", label = r2, x = 25, y = 7.5, hjust = 0, vjust = 1, size = 5) +
  # annotate("text", label = "p-values", x = 25, y = 7, hjust = 0, vjust = 1, size = 5) +
  # annotate("text", label = model_p_val, x = 35, y = 6.75, hjust = 0, vjust = 1, size = 4) +
  # annotate("text", label = hb_tot_p_val, x = 35, y = 6.5, hjust = 0, vjust = 1, size = 4) +
  
  lims(x = c(25, 225)) +
  theme_bw() +
  
  theme(# text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.title.y = element_text(size = 15),
        axis.text = element_text(size = 15),
        legend.title = element_text(size = 12),
        legend.text = element_text(size = 10))

# 3d plot?  
# require(plotly)
#   x <- df_fig4_outliers$cerebral_hb_tot
#   y <- df_fig4_outliers$hematocrit
#   z <- df_fig4_outliers$brain_swell
#   
#   df_fig4_outliers %>% plot_ly(x = x, y = y, z = z, type = "scatter3d", mode = "markers", color = x)

```
**Figure 4.** model: brain_swell ~ cerebral_hb_tot + hematocrit; adjusted R^2: 0.25; model p-value: 0.007; cerebral_hb_tot p-value: 0.003
 
\newpage
 
### Fig 5. Explanatory model of how to interpret hemoglobin concentration

See manuscript draft (figure created in PowerPoint)

## Supplement
### SFig 1. DFA analysis illustration

#### A. Raw signal
```{r fig_s1a}

df_figs1a <- read.table(paste0(base_dir, "Data/signal_segments/Hb_tot/TM0001CM01.txt"))

df_figs1a %>% 
  
  ggplot(aes(x = Time)) +
  geom_line(aes(y = THC)) +

  labs(y = "Hemoglobin concentration (μM)", x = "Time (s)") +
  ggtitle("Raw signal") +
  theme_bw() +
  theme(# text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```
 \newpage
 
#### B. Processed signal
```{r fig_s1b}

df_figs1b <- read.table(paste0(base_dir, "Data/filtered_signals/0.2s_mean_0.001_1_filt/Hb_tot/TM0001CM01.txt")) %>% 
  mutate(time = row_number())

df_figs1b %>% 
  ggplot(aes(x = time)) +
  geom_line(aes(y = V1)) +

  #lims(y = c(-0.001, 0.001)) +
  
  labs(y = "Bandpass filtered signal", x = "Time") +
  ggtitle("Pre-processed signal") +
  theme_bw() +
  theme(# text = element_text(family = "Arial"),
        #axis.text = element_blank(),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```
 \newpage
 
#### C. Cumulative sum
```{r fig_s1c}

df_figs1c <- df_figs1b %>% 
  mutate(mean = mean(V1),
         subtract_mean = V1 - mean,
         cumsum = cumsum(subtract_mean))
  
df_figs1c %>% 
  
  ggplot(aes(x = time, y = cumsum)) +
  geom_line() +
  geom_vline(xintercept = 1, lty = 2, color = "black") +
  geom_vline(xintercept = 1000, lty = 2, color = "black") +
  
  labs(y = "Cumulative sum", x = "Time") +
  ggtitle("Cumulative signal") +
  theme_bw() +
  theme(# text = element_text(family = "Arial"),
        axis.text = element_blank(),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```
 \newpage
 
#### D. Window detrend
```{r fig_s1d, fig.show = "hold", out.width = "50%"}

fit <- lm(cumsum ~ time, data = df_figs1c %>% dplyr::filter(time > 1 & time < 1000))

# Window 
df_figs1c %>% dplyr::filter(time > 1 & time < 1000) %>% 
  
  ggplot(aes(x = time, y = cumsum)) +
  geom_line(size = 2) +
  geom_abline(intercept = coef(fit)[1], slope = coef(fit)[2], color = "black", lty = 2, size = 2) +
  
  labs(y = "Cumulative sum", x = "Time") +
  ggtitle("Cumulative signal") +
  theme_bw() +
  theme(# text = element_text(family = "Arial"),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_blank(),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20),
        title = element_text(size = 25))

# Detrended
df_figs1c %>% dplyr::filter(time > 1 & time < 1000) %>% 
  
  ggplot(aes(x = time, y = fit$residuals)) +
  geom_line(size = 2) +
  geom_hline(yintercept = 0, color = "black", lty = 2, size = 2) +
  
  labs(y = "", x = "Time") +
  ggtitle("Detrended cumulative signal") +
  theme_bw() +
  theme(# text = element_text(family = "Arial"),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_blank(),
        axis.title = element_text(size = 28),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20),
        title = element_text(size = 25))

```
 \newpage
 
#### E. Results plot
```{r fig_s1e}

df_figs1e <- l_dfa_hbtot[[1]][[1]]

# find slope of segmented line
library(segmented)
df_figs1e <- df_figs1e %>% mutate(log10_avg_fluct = log10(avg_fluctuation), log10_window_size = log10(window_size))
fit_figs1e <- lm(log10_avg_fluct ~ log10_window_size, data = df_figs1e) 
segfit <- segmented(fit_figs1e)
slopes <- coef(segfit) # the coefficients are the differences in slope in comparison to the previous slope

# first line: 
#y = b0 + b1*x; y = intercept1 + slope1 * x

# second line:
# y = c0 + c1*x; y = intercept2 + slope2 * x

# At the breakpoint (break1), the segments b and c intersect: b0 + b1*x = c0 + c1*x
b0 <- slopes[[1]]
b1 <- slopes[[2]]

c1 <- slopes[[2]] + slopes[[3]]
break1 <- segfit$psi[[2]]

# Solve for c0 (intercept of second segment):
c0 <- b0 + b1 * break1 - c1 * break1

df_figs1e %>% 
  
  ggplot(aes(x = log10_window_size, y = log10_avg_fluct)) +
  geom_point(size = 3, shape = 1) +
  geom_abline(intercept = c0, slope = c1, color = "black", lty = 2) +
  annotate("text", x = -1, y = -2, label = paste0("alpha = ", round(c1, 2)), size = 8, vjust = 1, hjust = 0) +
  
  labs(x = "log10(window length)", y = "log10(average fluctuation)") +
  ggtitle("DFA results") +
  
  theme_bw() +
  theme(# text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20))

```
 \newpage
 
#### F. Simulation results
```{r fig_s1f}

wn_alpha <- 1:length(l_wn_dfa) %>% purrr::map_df(~l_wn_dfa[[.x]][[2]]) %>% dplyr::rename("white" = "log10(window_size)")
pn_alpha <- 1:length(l_pn_dfa) %>% purrr::map_df(~l_pn_dfa[[.x]][[2]]) %>% dplyr::rename("pink" = "log10(window_size)")
bn_alpha <- 1:length(l_bn_dfa) %>% purrr::map_df(~l_bn_dfa[[.x]][[2]]) %>% dplyr::rename("brown" = "log10(window_size)")

df_figs1f <- bind_cols(wn_alpha, pn_alpha, bn_alpha) %>%
  pivot_longer(1:3, names_to = "noise", values_to = "alpha")

# create color vector
my_col <- c("white" = "white", "brown" = "sienna2", "pink" = "pink")

# plot
df_figs1f %>% 
  
  ggplot(aes(x = noise, y = alpha, color = noise)) +
  geom_violin(fill = "transparent") +
  geom_jitter(position = position_jitter(width = 0.1), shape = 1, size = 2) +
  stat_summary(fun = "median", geom = "crossbar", aes(color = noise), size = 0.2, width = 0.5) +

  scale_color_manual(values = my_col) +
  labs(x = "noise type", y = "alpha (\u03b1)") +
  scale_y_continuous(limits = c(0, 1.7)) +
  theme_dark() +
  theme(legend.position = "none")  +
  
  theme(# text = element_text(family = "Arial"),
        strip.text = element_text(size = 15),
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 15))

```
 \newpage

### SFig 2. Logistic regression curves
```{r fig_s2, fig.height = 12}

# pivot longer and change factor levels
df_figs2 <- df_model %>% 
  mutate(status = as.numeric(ifelse(status == "CM", 1, 0))) %>% 
  pivot_longer(3:ncol(.), names_to = "variable", values_to = "value") %>% 
  mutate(variable = factor(variable, levels = c("hct", "lactate", "cerebral_hb_oxy", "cerebral_hb_tot", "cerebral_hb_oxy_alpha2", "cerebral_hb_tot_alpha2")))

# plot as individual regression curves
df_figs2 %>% 
  
  ggplot(aes(x = value, y = status)) +
  geom_point() +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), color = "black") +
  facet_wrap(~variable, scales = "free_x", nrow = 5, ncol = 2) +
  
  ylab("Status: 1 = CM, 0 = UM") +
  xlab("") +
  
  theme_bw() +
  theme(# text = element_text(family = "Arial"),
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 25),
        legend.text = element_text(size = 15),
        legend.title = element_text(size = 20),
        strip.text = element_text(size = 20))

```
**Figure S2.** Plotted are the individual logistic regression curves of individual variables tested for their discriminatory power between UM and CM. On the y-axis, 1 represents CM while 0 represents UM. A steeper curve indicates higher discriminatory power.

## Table of manuscript p-values

```{r fig_1_and_2_stats, eval = FALSE}

df_fig1_fig2_stats <- 
  
  df_fig1 %>% 
    group_by(tissue, variable, status) %>% 
    dplyr::select(value) %>% 
    summarise_all(funs(median = median, 
                       q1 = quantile(., probs = 0.25), 
                       q3 = quantile(., probs = 0.75)), na.rm = TRUE) %>% 
    mutate_if(is.numeric, round, 2) %>% 
  
  bind_rows(
    
    df_fig2 %>% 
      group_by(tissue, variable, status) %>% 
      dplyr::select(value) %>% 
      summarise_all(funs(median = median, 
                         q1 = quantile(., probs = 0.25), 
                         q3 = quantile(., probs = 0.75)), na.rm = TRUE) %>% 
      mutate_if(is.numeric, round, 2)
    
  )


```

```{r manuscript_stats, eval = FALSE}

# select voi
df_stats <- df_hbox %>% 
  select(c("status", 
           
           # clinical
           "age_calc", "temperature", "hr", "bp_sys", "bp_dias", "resp_rate", "pulse_ox", "hct", "lactate", "glucose", "aa_arg",   
           
           # concentrations
           matches("_hb_tot$|_hb_oxy$"),
           
           # alphas
           matches("alpha2")
           
  ))

# perform kruskal-wallis test on all variables
df_pvals <- 2:ncol(df_stats) %>% 
  purrr::map_dfr(~tibble(
    
    variable = names(df_stats[.x]),
    kw_pval = kruskal.test(df_stats[[.x]] ~ df_stats$status)$p.value,
    um_hc = DunnTest(df_stats[[.x]] ~ df_stats$status)[[1]][1, 2],
    cm_hc = DunnTest(df_stats[[.x]] ~ df_stats$status)[[1]][2, 2],
    cm_um = DunnTest(df_stats[[.x]] ~ df_stats$status)[[1]][3, 2]

  ))

df_pvals %>% knitr::kable()

# write to csv
write.csv(df_pvals, file = "Outputs/manuscript_pvals.csv")

```

```{r export_xlsx, eval = FALSE}

require(xlsx)

wb <- createWorkbook()

sheet = createSheet(wb, "Table 1a")

  addDataFrame(df_clinical_res %>% as.data.frame(), sheet = sheet, startColumn = 1, row.names = FALSE)

sheet = createSheet(wb, "Table 1b")

  addDataFrame(df_cm_voi %>% count(bcs_session) %>% as.data.frame(), sheet = sheet, startColumn = 1, row.names = FALSE)
  addDataFrame(df_cm_voi %>% count(brain_swell) %>% as.data.frame(), sheet = sheet, startColumn = 4, row.names = FALSE)
  addDataFrame(df_cm_hrp2 %>% as.data.frame(), sheet = sheet, startColumn = 7, row.names = FALSE)
  
sheet = createSheet(wb, "Figs 1 & 2 stats")

  addDataFrame(df_fig1_fig2_stats %>% as.data.frame(), sheet = sheet, startColumn = 1, row.names = FALSE)

sheet = createSheet(wb, "Table 3c & d")

  addDataFrame(table3c %>% as.data.frame(), sheet = sheet, startColumn = 1, row.names = FALSE)
  addDataFrame(fig3d_inset %>% as.data.frame(), sheet = sheet, startColumn = 7, row.names = FALSE)
  
sheet = createSheet(wb, "manuscript p-vals")

  addDataFrame(df_pvals %>% as.data.frame(), sheet = sheet, startColumn = 1, row.names = FALSE)

saveWorkbook(wb, "Outputs/all_manuscript_tables.xlsx")

```

